{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "#import numpy \n",
    "import pyspark\n",
    "from pyspark import SparkContext\n",
    "\n",
    "sc=None\n",
    "\n",
    "def map_fun(i):\n",
    "  with tf.Graph().as_default() as g:\n",
    "    hello = tf.constant('Hello, TensorFlow!', name=\"hello_constant\")\n",
    "    with tf.Session() as sess:\n",
    "      return sess.run(hello)\n",
    "\n",
    "if (sc):\n",
    "    print(\"sc exist, use old.\")\n",
    "else:\n",
    "    sc = SparkContext(appName=\"tensorflow\") \n",
    "#help(sc.parallelize)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[b'Hello, TensorFlow!',\n",
       " b'Hello, TensorFlow!',\n",
       " b'Hello, TensorFlow!',\n",
       " b'Hello, TensorFlow!',\n",
       " b'Hello, TensorFlow!',\n",
       " b'Hello, TensorFlow!',\n",
       " b'Hello, TensorFlow!',\n",
       " b'Hello, TensorFlow!',\n",
       " b'Hello, TensorFlow!',\n",
       " b'Hello, TensorFlow!']"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rdd = sc.parallelize(range(10))\n",
    "rdd.map(map_fun).collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracting MNIST_data/train-images-idx3-ubyte.gz\n",
      "Extracting MNIST_data/train-labels-idx1-ubyte.gz\n",
      "Extracting MNIST_data/t10k-images-idx3-ubyte.gz\n",
      "Extracting MNIST_data/t10k-labels-idx1-ubyte.gz\n"
     ]
    }
   ],
   "source": [
    "from tensorflow.examples.tutorials.mnist import input_data\n",
    "mnist = input_data.read_data_sets(\"MNIST_data/\", one_hot=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<tensorflow.examples.tutorials.mnist.input_data.read_data_sets.<locals>.DataSets object at 0x7fad68465ac8>\n"
     ]
    }
   ],
   "source": [
    "print(mnist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "\n",
    "import gzip\n",
    "import os\n",
    "import sys\n",
    "\n",
    "import tensorflow.python.platform\n",
    "\n",
    "import numpy as np\n",
    "from six.moves import urllib\n",
    "from six.moves import xrange  # pylint: disable=redefined-builtin\n",
    "import tensorflow as tf\n",
    "import itertools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'\n",
    "WORK_DIRECTORY = 'data'\n",
    "IMAGE_SIZE = 28\n",
    "NUM_CHANNELS = 1\n",
    "PIXEL_DEPTH = 255\n",
    "NUM_LABELS = 10\n",
    "VALIDATION_SIZE = 5000  # Size of the validation set.\n",
    "SEED = 66478  # Set to None for random seed.\n",
    "BATCH_SIZE = 64\n",
    "NUM_EPOCHS = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# import os\n",
    "import urllib\n",
    "\n",
    "def maybe_download(filename):\n",
    "  \"\"\"Download the data from Yann's website, unless it's already here.\"\"\"\n",
    "  if not os.path.exists(WORK_DIRECTORY):\n",
    "    os.mkdir(WORK_DIRECTORY)\n",
    "  filepath = os.path.join(WORK_DIRECTORY, filename)\n",
    "  if not os.path.exists(filepath):\n",
    "    filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath)\n",
    "    statinfo = os.stat(filepath)\n",
    "    print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')\n",
    "  return filepath\n",
    "\n",
    "def extract_data(filename, num_images):\n",
    "  \"\"\"Extract the images into a 4D tensor [image index, y, x, channels].\n",
    "\n",
    "  Values are rescaled from [0, 255] down to [-0.5, 0.5].\n",
    "  \"\"\"\n",
    "  print('Extracting', filename)\n",
    "  with gzip.open(filename) as bytestream:\n",
    "    bytestream.read(16)\n",
    "    buf = bytestream.read(IMAGE_SIZE * IMAGE_SIZE * num_images)\n",
    "    data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)\n",
    "    data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH\n",
    "    data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE, 1)\n",
    "    return data\n",
    "\n",
    "\n",
    "def extract_labels(filename, num_images):\n",
    "  \"\"\"Extract the labels into a 1-hot matrix [image index, label index].\"\"\"\n",
    "  print('Extracting', filename)\n",
    "  with gzip.open(filename) as bytestream:\n",
    "    bytestream.read(8)\n",
    "    buf = bytestream.read(1 * num_images)\n",
    "    labels = np.frombuffer(buf, dtype=np.uint8)\n",
    "    #Convert to dense 1-hot representation.\n",
    "  return (np.arange(NUM_LABELS) == labels[:, None]).astype(np.float32)\n",
    "\n",
    "def fake_data(num_images):\n",
    "  \"\"\"Generate a fake dataset that matches the dimensions of MNIST.\"\"\"\n",
    "  data = np.ndarray(\n",
    "      shape=(num_images, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS),\n",
    "      dtype=np.float32)\n",
    "  labels = np.zeros(shape=(num_images, NUM_LABELS), dtype=np.float32)\n",
    "  for image in xrange(num_images):\n",
    "    label = image % 2\n",
    "    data[image, :, :, 0] = label - 0.5\n",
    "    labels[image, label] = 1.0\n",
    "  return data, labels\n",
    "\n",
    "\n",
    "def error_rate(predictions, labels):\n",
    "  \"\"\"Return the error rate based on dense predictions and 1-hot labels.\"\"\"\n",
    "  return 100.0 - (\n",
    "      100.0 *\n",
    "      np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) /\n",
    "      predictions.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#help(maybe_download)\n",
    "#train_data_filename = maybe_download('train-images-idx3-ubyte.gz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Succesfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\n",
      "Succesfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\n",
      "Succesfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\n"
     ]
    }
   ],
   "source": [
    "# Get the data.\n",
    "train_data_filename = maybe_download('train-images-idx3-ubyte.gz')\n",
    "train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz')\n",
    "test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz')\n",
    "test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracting data/train-images-idx3-ubyte.gz\n"
     ]
    },
    {
     "ename": "EOFError",
     "evalue": "Compressed file ended before the end-of-stream marker was reached",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mEOFError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-10-aa2ce82e88a4>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# Extract it into numpy arrays.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mtrain_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mextract_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain_data_filename\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m60000\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[0mtrain_labels\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mextract_labels\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain_labels_filename\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m60000\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[0mtest_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mextract_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_data_filename\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m10000\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[0mtest_labels\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mextract_labels\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_labels_filename\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m10000\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-7-e9cbd03780d5>\u001b[0m in \u001b[0;36mextract_data\u001b[1;34m(filename, num_images)\u001b[0m\n\u001b[0;32m     21\u001b[0m   \u001b[1;32mwith\u001b[0m \u001b[0mgzip\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mbytestream\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     22\u001b[0m     \u001b[0mbytestream\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m16\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m     \u001b[0mbuf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbytestream\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mIMAGE_SIZE\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mIMAGE_SIZE\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mnum_images\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     24\u001b[0m     \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrombuffer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbuf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0muint8\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     25\u001b[0m     \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mPIXEL_DEPTH\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;36m2.0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0mPIXEL_DEPTH\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/media/supermap/Application/OpenAI/anaconda3/envs/tensor/lib/python3.5/gzip.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(self, size)\u001b[0m\n\u001b[0;32m    272\u001b[0m             \u001b[1;32mimport\u001b[0m \u001b[0merrno\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    273\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mOSError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0merrno\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mEBADF\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"read() on write-only GzipFile object\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 274\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_buffer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msize\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    275\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    276\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mread1\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/media/supermap/Application/OpenAI/anaconda3/envs/tensor/lib/python3.5/_compression.py\u001b[0m in \u001b[0;36mreadinto\u001b[1;34m(self, b)\u001b[0m\n\u001b[0;32m     66\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mreadinto\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     67\u001b[0m         \u001b[1;32mwith\u001b[0m \u001b[0mmemoryview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mview\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mview\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"B\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mbyte_view\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 68\u001b[1;33m             \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbyte_view\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     69\u001b[0m             \u001b[0mbyte_view\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     70\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/media/supermap/Application/OpenAI/anaconda3/envs/tensor/lib/python3.5/gzip.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(self, size)\u001b[0m\n\u001b[0;32m    478\u001b[0m                 \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    479\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mbuf\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34mb\"\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 480\u001b[1;33m                 raise EOFError(\"Compressed file ended before the \"\n\u001b[0m\u001b[0;32m    481\u001b[0m                                \"end-of-stream marker was reached\")\n\u001b[0;32m    482\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mEOFError\u001b[0m: Compressed file ended before the end-of-stream marker was reached"
     ]
    }
   ],
   "source": [
    "# Extract it into numpy arrays.\n",
    "train_data = extract_data(train_data_filename, 60000)\n",
    "train_labels = extract_labels(train_labels_filename, 60000)\n",
    "test_data = extract_data(test_data_filename, 10000)\n",
    "test_labels = extract_labels(test_labels_filename, 10000)\n",
    "\n",
    "# Generate a validation set.\n",
    "validation_data = train_data[:VALIDATION_SIZE, :, :, :]\n",
    "validation_labels = train_labels[:VALIDATION_SIZE]\n",
    "train_data = train_data[VALIDATION_SIZE:, :, :, :]\n",
    "train_labels = train_labels[VALIDATION_SIZE:]\n",
    "num_epochs = NUM_EPOCHS\n",
    "train_size = train_labels.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
